library(here)
library(tidyverse)
library(cowplot)
library(GGally)
theme_set(theme_light(base_size = 14))

library(scuttle)
library(scater)
library(scran)

library(kableExtra)
library(DDCompanion)
library(iCOBRA)

Setup

## Directory setup
here_root <- "benchmarks/covid"
here::i_am(file.path(here_root, "analysis/covid-sim-results.Rmd"))
#> here() starts at /kyukon/data/gent/vo/000/gvo00063/Jeroen/DD_project_v2

data_dir <- here::here(here_root, "data")
stopifnot(dir.exists(data_dir))
res_dir <- here::here(here_root, "results")
stopifnot(dir.exists(res_dir))
  • Using methods: bGLM, qbGLM, qbGLM_offset, qbGLM_offset_squeeze, edgeR_NB, edgeR_QP, edgeR_NB_optim, edgeR_QP_optim
  • Using celltype: immature_B_cell_Moderate
  • Using prop_DE: 0.05
  • Using datatype: pb

Load results

res_files <- map(prop_DE, ~ get_sim_res_files(dataset = "covid", 
                                              methods = methods,
                                              datatype = datatype,
                                              prop_DE = .x, 
                                              celltype = celltype
)) %>% set_names(paste0("prop_DE_", prop_DE))

#res_list <- map_depth(res_files, 2, readRDS)
res_list <- lapply(res_files, function(element){
    lapply(element, readRDS)
})

## Check for expected data structure
stopifnot(unlist(map_depth(res_list, 3, ~ is.data.frame(.x$results))))

Load SCE objects

sce_files <- map(prop_DE, ~ get_SCE_files(
    dataset = "covid",
    which = "sim_replicates", prop_DE = .x,
    celltype = celltype
)) %>% set_names(paste0("prop_DE_", prop_DE))

sce_objects <- lapply(sce_files, readRDS)

Data overview

  • The results were generated from 1 simulation replicates
  • Each replicate was generated by randomly splitting the subjects in two mock groups
  • The same mock replicates were used for each prop_DE level. I.e. prop_DE_0.1$replicate1 and prop_DE_0.05$replicate1 will have the same mock group assignment
  • DE was introduced by swapping genes in one of the mock groups
  • Cells per subject were sub-sampled to 100 cells per subject
map_dfr(sce_objects,
    ~ map_dfr(.x, function(x) c(nrows = nrow(x), ncols = ncol(x)),
        .id = "replicate"
    ),
    .id = "prop_DE"
)

Subjects are divided across mock groups as follows:

lapply(sce_objects[[1]], function(x) table(x$donor_id, x$mock_group))
#> $replicate_1
#>            
#>               A   B
#>   CV0037      0  35
#>   CV0052      0  28
#>   CV0137     32   0
#>   CV0234      0  52
#>   CV0257     68   0
#>   MH8919331   0  50
#>   MH9143275   0  82
#>   MH9143322   0  20
#>   MH9143323   0  59
#>   MH9143324   0  30
#>   MH9143327  22   0
#>   MH9143423  75   0
#>   MH9143424 150   0
#>   MH9179821  27   0
#>   MH9179823   0  72
#>   MH9179824  75   0
#>   MH9179825  35   0
#> 
#> $replicate_2
#>            
#>               A   B
#>   CV0037      0  35
#>   CV0052     28   0
#>   CV0137      0  32
#>   CV0234     52   0
#>   CV0257     68   0
#>   MH8919331  50   0
#>   MH9143275  82   0
#>   MH9143322  20   0
#>   MH9143323  59   0
#>   MH9143324   0  30
#>   MH9143327  22   0
#>   MH9143423   0  75
#>   MH9143424   0 150
#>   MH9179821  27   0
#>   MH9179823   0  72
#>   MH9179824   0  75
#>   MH9179825   0  35
#> 
#> $replicate_3
#>            
#>               A   B
#>   CV0037     35   0
#>   CV0052     28   0
#>   CV0137     32   0
#>   CV0234      0  52
#>   CV0257      0  68
#>   MH8919331   0  50
#>   MH9143275   0  82
#>   MH9143322   0  20
#>   MH9143323   0  59
#>   MH9143324   0  30
#>   MH9143327  22   0
#>   MH9143423  75   0
#>   MH9143424   0 150
#>   MH9179821  27   0
#>   MH9179823  72   0
#>   MH9179824  75   0
#>   MH9179825  35   0
#> 
#> $replicate_4
#>            
#>               A   B
#>   CV0037     35   0
#>   CV0052     28   0
#>   CV0137      0  32
#>   CV0234      0  52
#>   CV0257     68   0
#>   MH8919331   0  50
#>   MH9143275  82   0
#>   MH9143322  20   0
#>   MH9143323   0  59
#>   MH9143324  30   0
#>   MH9143327  22   0
#>   MH9143423   0  75
#>   MH9143424   0 150
#>   MH9179821   0  27
#>   MH9179823   0  72
#>   MH9179824  75   0
#>   MH9179825  35   0
#> 
#> $replicate_5
#>            
#>               A   B
#>   CV0037     35   0
#>   CV0052      0  28
#>   CV0137      0  32
#>   CV0234      0  52
#>   CV0257     68   0
#>   MH8919331  50   0
#>   MH9143275   0  82
#>   MH9143322   0  20
#>   MH9143323  59   0
#>   MH9143324   0  30
#>   MH9143327   0  22
#>   MH9143423   0  75
#>   MH9143424   0 150
#>   MH9179821  27   0
#>   MH9179823  72   0
#>   MH9179824  75   0
#>   MH9179825  35   0

The number of DE and non-DE genes per replicate:

map(sce_objects, ~ map_dfr(.x, ~ table(rowData(.x)$is_DE), .id = "replicate"))
#> $prop_DE_0.05
#> # A tibble: 5 × 3
#>   replicate   `FALSE`     `TRUE`     
#>   <chr>       <table[1d]> <table[1d]>
#> 1 replicate_1 12021       633        
#> 2 replicate_2 12022       632        
#> 3 replicate_3 12021       633        
#> 4 replicate_4 12022       632        
#> 5 replicate_5 12022       632

t-SNE plots

tSNE_plots <- map(sce_objects, function(x) {
    p <- imap(x, function(sce, name) {
        plotTSNE(sce, colour_by = "mock_group") +
            ggtitle(name)
    })
    patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})

Prop. DE = 0.05

Extract results of interest

Runtimes

## Get runtimes for each celltype
runtimes <- map(res_list, ~ map_dfr(.x, get_runtimes, depth = 1, .id = "method"))

P-values

res_tables <- map_depth(res_list, 2, get_aggregated_rep_tables, depth = 1)
res_tables <- map(res_tables, ~ combine_tables(.x, .id = "method"))

Visualize results

Run times

runtime_plots <- imap(
    runtimes,
    ~ plot_run_times(.x, width = 0.2, height = 0) +
        ggtitle(.y)
)

prop_DE_0.05

P-value distributions

pval_figs <- map(res_tables, ~ pval_hist(.x))

prop_DE_0.05

P-value distributions for non-DE genes

non_de_res <- map2(sce_objects, res_tables, function(sce_list, res) {
    by_rep <- split(res, res$replicate)
    out <- map2(sce_list, by_rep, function(sce, tbl) {
        ## Select only non-DE genes
        non_de <- rownames(sce)[!rowData(sce)$is_DE]
        tbl[tbl$gene %in% non_de, ]
    })
    bind_rows(out, .id = "replicate")
})
non_de_pval_figs <- map(non_de_res, ~ pval_hist(.x))

prop_DE_0.05

Performance evaluation with iCOBRA

Prepare Data

P-values for missing genes are set to 1.

cobra_data <- map2(res_tables, sce_objects, function(res_table, sce_list) {
    ## Split up results per replicate
    res_per_replicate <- split(res_table, res_table$replicate)
    map2(res_per_replicate, sce_list, prepare_COBRAData, replace_missing = TRUE)
})

cobra_perf <- map_depth(cobra_data, 2, calculate_performance, binary_truth = "status")

n_methods <- length(unique(res_tables[[1]]$method)) + 1 # +1 for "truth"
cobra_objects <- map_depth(cobra_perf, 2, prepare_data_for_plot)

TPR plots

no_legend <- theme(legend.position = "none")
tpr_plots <- map(cobra_objects, function(cobra_list) {
    p <- imap(cobra_list, ~ plot_tpr(.x, title = .y) + no_legend)
    patchwork::wrap_plots(p, ncol = 3)
})

prop_DE_0.05

FPR plots

fpr_plots <- map(cobra_objects, function(cobra_list) {
    p <- imap(cobra_list, ~ plot_fpr(.x, title = .y) + no_legend)
    patchwork::wrap_plots(p, ncol = 3)
})

prop_DE_0.05

FDR-TPR curves

fdr_tpr_plots <- map(cobra_objects, function(cobra_list) {
    p <- imap(cobra_list, ~ plot_fdrtprcurve(.x, title = .y))
    patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})

prop_DE_0.05

Zoomed FDR-TPR curves

Same plots but zoomed on the FDR-TPR points region.

fdr_tpr_plots_zoomed <- map(cobra_objects, function(cobra_list) {
    p <- imap(cobra_list, ~ plot_zoomed_fdrtprcurve(.x, title = .y))
    patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})

prop_DE_0.05

Session info

Session info
#> [1] "2023-07-21 19:28:04 CEST"
#> ─ Session info ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.0 (2022-04-22)
#>  os       Red Hat Enterprise Linux 8.6 (Ootpa)
#>  system   x86_64, linux-gnu
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Europe/Paris
#>  date     2023-07-21
#>  pandoc   2.13 @ /apps/gent/RHEL8/zen2-ib/software/Pandoc/2.13/bin/ (via rmarkdown)
#> 
#> ─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
#>  ! package              * version   date (UTC) lib source
#>  P argparse             * 2.2.2     2023-02-15 [?] CRAN (R 4.2.0)
#>  P beachmat               2.14.2    2023-04-07 [?] Bioconductor
#>  P beeswarm               0.4.0     2021-06-01 [?] CRAN (R 4.2.0)
#>  P Biobase              * 2.58.0    2022-11-01 [?] Bioconductor
#>  P BiocGenerics         * 0.44.0    2022-11-01 [?] Bioconductor
#>  P BiocManager            1.30.20   2023-02-24 [?] CRAN (R 4.2.0)
#>  P BiocNeighbors          1.16.0    2022-11-01 [?] Bioconductor
#>  P BiocParallel           1.32.6    2023-03-17 [?] Bioconductor
#>  P BiocSingular           1.14.0    2022-11-01 [?] Bioconductor
#>  P bitops                 1.0-7     2021-04-24 [?] CRAN (R 4.2.0)
#>  P bluster                1.8.0     2022-11-01 [?] Bioconductor
#>  P bslib                  0.4.2     2022-12-16 [?] CRAN (R 4.2.0)
#>  P cachem                 1.0.8     2023-05-01 [?] CRAN (R 4.2.0)
#>  P cli                    3.6.1     2023-03-23 [?] CRAN (R 4.2.0)
#>  P cluster                2.1.4     2022-08-22 [?] CRAN (R 4.2.0)
#>  P codetools              0.2-19    2023-02-01 [?] CRAN (R 4.2.0)
#>  P colorspace             2.1-0     2023-01-23 [?] CRAN (R 4.2.0)
#>  P cowplot              * 1.1.1     2020-12-30 [?] CRAN (R 4.2.0)
#>    DDCompanion          * 0.1.15    2023-05-22 [1] local (./package)
#>  P DelayedArray           0.24.0    2022-11-01 [?] Bioconductor
#>  P DelayedMatrixStats     1.20.0    2022-11-01 [?] Bioconductor
#>  P digest                 0.6.31    2022-12-11 [?] CRAN (R 4.2.0)
#>  P dplyr                * 1.1.2     2023-04-20 [?] CRAN (R 4.2.0)
#>  P dqrng                  0.3.0     2021-05-01 [?] CRAN (R 4.2.0)
#>  P DT                     0.28      2023-05-18 [?] CRAN (R 4.2.0)
#>  P edgeR                  3.40.2    2023-01-19 [?] Bioconductor
#>  P ellipsis               0.3.2     2021-04-29 [?] CRAN (R 4.2.0)
#>  P evaluate               0.21      2023-05-05 [?] CRAN (R 4.2.0)
#>  P fansi                  1.0.4     2023-01-22 [?] CRAN (R 4.2.0)
#>  P farver                 2.1.1     2022-07-06 [?] CRAN (R 4.2.0)
#>  P fastmap                1.1.1     2023-02-24 [?] CRAN (R 4.2.0)
#>  P findpython             1.0.8     2023-03-14 [?] CRAN (R 4.2.0)
#>  P forcats              * 1.0.0     2023-01-29 [?] CRAN (R 4.2.0)
#>  P generics               0.1.3     2022-07-05 [?] CRAN (R 4.2.0)
#>  P GenomeInfoDb         * 1.34.9    2023-02-02 [?] Bioconductor
#>  P GenomeInfoDbData       1.2.9     2023-05-20 [?] Bioconductor
#>  P GenomicRanges        * 1.50.2    2022-12-16 [?] Bioconductor
#>  P GGally               * 2.1.2     2021-06-21 [?] CRAN (R 4.2.0)
#>  P ggbeeswarm             0.7.2     2023-04-29 [?] CRAN (R 4.2.0)
#>  P ggplot2              * 3.4.2     2023-04-03 [?] CRAN (R 4.2.0)
#>  P ggrepel                0.9.3     2023-02-03 [?] CRAN (R 4.2.0)
#>  P glue                   1.6.2     2022-02-24 [?] CRAN (R 4.2.0)
#>  P gridExtra              2.3       2017-09-09 [?] CRAN (R 4.2.0)
#>  P gtable                 0.3.3     2023-03-21 [?] CRAN (R 4.2.0)
#>  P here                 * 1.0.1     2020-12-13 [?] CRAN (R 4.2.0)
#>  P highr                  0.10      2022-12-22 [?] CRAN (R 4.2.0)
#>  P hms                    1.1.3     2023-03-21 [?] CRAN (R 4.2.0)
#>  P htmltools              0.5.5     2023-03-23 [?] CRAN (R 4.2.0)
#>  P htmlwidgets            1.6.2     2023-03-17 [?] CRAN (R 4.2.0)
#>  P httpuv                 1.6.11    2023-05-11 [?] CRAN (R 4.2.0)
#>  P httr                   1.4.6     2023-05-08 [?] CRAN (R 4.2.0)
#>  P iCOBRA               * 1.26.0    2022-11-01 [?] Bioconductor
#>  P igraph                 1.4.3     2023-05-22 [?] CRAN (R 4.2.0)
#>  P IRanges              * 2.32.0    2022-11-01 [?] Bioconductor
#>  P irlba                  2.3.5.1   2022-10-03 [?] CRAN (R 4.2.0)
#>  P jquerylib              0.1.4     2021-04-26 [?] CRAN (R 4.2.0)
#>  P jsonlite               1.8.4     2022-12-06 [?] CRAN (R 4.2.0)
#>  P kableExtra           * 1.3.4     2021-02-20 [?] CRAN (R 4.2.0)
#>  P knitr                  1.42      2023-01-25 [?] CRAN (R 4.2.0)
#>  P labeling               0.4.2     2020-10-20 [?] CRAN (R 4.2.0)
#>  P later                  1.3.1     2023-05-02 [?] CRAN (R 4.2.0)
#>  P lattice                0.21-8    2023-04-05 [?] CRAN (R 4.2.0)
#>  P lifecycle              1.0.3     2022-10-07 [?] CRAN (R 4.2.0)
#>  P limma                  3.54.2    2023-02-28 [?] Bioconductor
#>  P locfit                 1.5-9.7   2023-01-02 [?] CRAN (R 4.2.0)
#>  P lubridate            * 1.9.2     2023-02-10 [?] CRAN (R 4.2.0)
#>  P magrittr               2.0.3     2022-03-30 [?] CRAN (R 4.2.0)
#>  P Matrix                 1.5-4.1   2023-05-18 [?] CRAN (R 4.2.0)
#>  P MatrixGenerics       * 1.10.0    2022-11-01 [?] Bioconductor
#>  P matrixStats          * 0.63.0    2022-11-18 [?] CRAN (R 4.2.0)
#>  P metapod                1.6.0     2022-11-01 [?] Bioconductor
#>  P mime                   0.12      2021-09-28 [?] CRAN (R 4.2.0)
#>  P munsell                0.5.0     2018-06-12 [?] CRAN (R 4.2.0)
#>  P patchwork              1.1.2     2022-08-19 [?] CRAN (R 4.2.0)
#>  P pillar                 1.9.0     2023-03-22 [?] CRAN (R 4.2.0)
#>  P pkgconfig              2.0.3     2019-09-22 [?] CRAN (R 4.2.0)
#>  P plyr                   1.8.8     2022-11-11 [?] CRAN (R 4.2.0)
#>  P promises               1.2.0.1   2021-02-11 [?] CRAN (R 4.2.0)
#>  P purrr                * 1.0.1     2023-01-10 [?] CRAN (R 4.2.0)
#>  P R6                     2.5.1     2021-08-19 [?] CRAN (R 4.2.0)
#>  P RColorBrewer           1.1-3     2022-04-03 [?] CRAN (R 4.2.0)
#>  P Rcpp                   1.0.10    2023-01-22 [?] CRAN (R 4.2.0)
#>  P RCurl                  1.98-1.12 2023-03-27 [?] CRAN (R 4.2.0)
#>  P readr                * 2.1.4     2023-02-10 [?] CRAN (R 4.2.0)
#>    renv                   0.17.3    2023-04-06 [1] CRAN (R 4.2.0)
#>  P reshape                0.8.9     2022-04-12 [?] CRAN (R 4.2.0)
#>  P reshape2               1.4.4     2020-04-09 [?] CRAN (R 4.2.0)
#>  P rlang                  1.1.1     2023-04-28 [?] CRAN (R 4.2.0)
#>  P rmarkdown              2.21      2023-03-26 [?] CRAN (R 4.2.0)
#>  P ROCR                   1.0-11    2020-05-02 [?] CRAN (R 4.2.0)
#>  P rprojroot              2.0.3     2022-04-02 [?] CRAN (R 4.2.0)
#>  P rstudioapi             0.14      2022-08-22 [?] CRAN (R 4.2.0)
#>  P rsvd                   1.0.5     2021-04-16 [?] CRAN (R 4.2.0)
#>  P rvest                  1.0.3     2022-08-19 [?] CRAN (R 4.2.0)
#>  P S4Vectors            * 0.36.2    2023-02-26 [?] Bioconductor
#>  P sass                   0.4.6     2023-05-03 [?] CRAN (R 4.2.0)
#>  P ScaledMatrix           1.6.0     2022-11-01 [?] Bioconductor
#>  P scales                 1.2.1     2022-08-20 [?] CRAN (R 4.2.0)
#>  P scater               * 1.26.1    2022-11-13 [?] Bioconductor
#>  P scran                * 1.26.2    2023-01-19 [?] Bioconductor
#>  P scuttle              * 1.8.4     2023-01-19 [?] Bioconductor
#>  P sessioninfo            1.2.2     2021-12-06 [?] CRAN (R 4.2.0)
#>  P shiny                  1.7.4     2022-12-15 [?] CRAN (R 4.2.0)
#>  P shinyBS                0.61.1    2022-04-17 [?] CRAN (R 4.2.0)
#>  P shinydashboard         0.7.2     2021-09-30 [?] CRAN (R 4.2.0)
#>  P SingleCellExperiment * 1.20.1    2023-03-17 [?] Bioconductor
#>  P sparseMatrixStats      1.10.0    2022-11-01 [?] Bioconductor
#>  P statmod                1.5.0     2023-01-06 [?] CRAN (R 4.2.0)
#>  P stringi                1.7.12    2023-01-11 [?] CRAN (R 4.2.0)
#>  P stringr              * 1.5.0     2022-12-02 [?] CRAN (R 4.2.0)
#>  P SummarizedExperiment * 1.28.0    2022-11-01 [?] Bioconductor
#>  P svglite                2.1.1     2023-01-10 [?] CRAN (R 4.2.0)
#>  P systemfonts            1.0.4     2022-02-11 [?] CRAN (R 4.2.0)
#>  P tibble               * 3.2.1     2023-03-20 [?] CRAN (R 4.2.0)
#>  P tidyr                * 1.3.0     2023-01-24 [?] CRAN (R 4.2.0)
#>  P tidyselect             1.2.0     2022-10-10 [?] CRAN (R 4.2.0)
#>  P tidyverse            * 2.0.0     2023-02-22 [?] CRAN (R 4.2.0)
#>  P timechange             0.2.0     2023-01-11 [?] CRAN (R 4.2.0)
#>  P tzdb                   0.4.0     2023-05-12 [?] CRAN (R 4.2.0)
#>  P UpSetR                 1.4.0     2019-05-22 [?] CRAN (R 4.2.0)
#>  P utf8                   1.2.3     2023-01-31 [?] CRAN (R 4.2.0)
#>  P vctrs                  0.6.2     2023-04-19 [?] CRAN (R 4.2.0)
#>  P vipor                  0.4.5     2017-03-22 [?] CRAN (R 4.2.0)
#>  P viridis                0.6.3     2023-05-03 [?] CRAN (R 4.2.0)
#>  P viridisLite            0.4.2     2023-05-02 [?] CRAN (R 4.2.0)
#>  P webshot                0.5.4     2022-09-26 [?] CRAN (R 4.2.0)
#>  P withr                  2.5.0     2022-03-03 [?] CRAN (R 4.2.0)
#>  P xfun                   0.39      2023-04-20 [?] CRAN (R 4.2.0)
#>  P xml2                   1.3.4     2023-04-27 [?] CRAN (R 4.2.0)
#>  P xtable                 1.8-4     2019-04-21 [?] CRAN (R 4.2.0)
#>  P XVector                0.38.0    2022-11-01 [?] Bioconductor
#>  P yaml                   2.3.7     2023-01-23 [?] CRAN (R 4.2.0)
#>  P zlibbioc               1.44.0    2022-11-01 [?] Bioconductor
#> 
#>  [1] /kyukon/data/gent/vo/000/gvo00063/Jeroen/DD_project_v2/benchmarks/covid/renv/library/R-4.2/x86_64-pc-linux-gnu
#>  [2] /kyukon/home/gent/460/vsc46052/.cache/R/renv/sandbox/R-4.2/x86_64-pc-linux-gnu/4df86545
#> 
#>  P ── Loaded and on-disk path mismatch.
#> 
#> ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────